from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.config.parser import ConfigParser
from marker.output import text_from_rendered

# 配置OpenAI服务
config = {
    "use_llm": True,
    "llm_service": "marker.services.openai.OpenAIService",
    "openai_api_key": "sk-2l0CdQE3tCO2xyIzC286FaE33926464182F40d603014DeDd",
    "openai_base_url": "http://106.54.60.26:3400/v1",
    "openai_model": "gpt-4o-mini",  # 或其他模型
    "output_format": "markdown"
}

config_parser = ConfigParser(config)

# 创建转换器
converter = PdfConverter(
    config=config_parser.generate_config_dict(),
    artifact_dict=create_model_dict(),
    processor_list=config_parser.get_processors(),
    renderer=config_parser.get_renderer(),
    llm_service=config_parser.get_llm_service()
)

# 转换PDF
rendered = converter("14468608-1.pdf")

text, metadata, images = text_from_rendered(rendered)